统计函数

1> pct_change()

将每个元素与其前一个元素作比较

pd_str = pd.Series([1, 2, 4, 5, 3])
pct_ch = pd_str.pct_change()
print(f'系列:\n{pd_str}')

# 输出结果:
#  系列:
#  0    1
#  1    2
#  2    4
#  3    5
#  4    3
#  dtype: int64

print(f'系列前后元素变化百分比:\n{pct_ch}')

# 输出结果:
#  系列前后元素变化百分比:
#  0     NaN
#  1    1.00
#  2    1.00
#  #  3    0.25
#  4   -0.40
#  dtype: float64

2> 协方差

sr_1 = pd.Series(np.random.randn(20))
sr_2 = pd.Series(np.random.randn(30))
print(f'序列之间的协方差:\n{sr_1.cov(sr_2)}')

# 输出结果:
#  序列之间的协方差:
#  0.12651707822457314

df = pd.DataFrame(np.random.randn(20,3),columns = ['a', 'b', 'c'])
print(f'DataFrame 中序列之间的协方差:\n{df.cov()}')

# 输出结果:
#  DataFrame 中序列之间的协方差:
#            a         b         c
#  a  0.768833  0.091362 -0.286527
#  b  0.091362  0.411139 -0.026002
#  c -0.286527 -0.026002  0.449343

3> 相关性

df = pd.DataFrame(np.random.randn(10,3),columns=['a', 'b', 'c'])
print(df.corr())

# 输出结果:
#            a         b         c
#  a  1.000000  0.296371 -0.012415
#  b  0.296371  1.000000 -0.137421
#  c -0.012415 -0.137421  1.000000

df.loc[df.index[:4],'a'] = np.nan
df.loc[df.index[4:10],'b'] = np.nan
print(df.corr())

# 输出结果:
#           a         b         c
#  a  1.00000       NaN  0.035550
#  b      NaN  1.000000 -0.200721
#  c  0.03555 -0.200721  1.000000